{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "parallelized-data.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true,
      "machine_shape": "hm",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/lmoroney/tfbook/blob/master/chapter4/parallelized-data.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zX4Kg8DUTKWO",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
        "# you may not use this file except in compliance with the License.\n",
        "# You may obtain a copy of the License at\n",
        "#\n",
        "# https://www.apache.org/licenses/LICENSE-2.0\n",
        "#\n",
        "# Unless required by applicable law or agreed to in writing, software\n",
        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
        "# See the License for the specific language governing permissions and\n",
        "# limitations under the License."
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "coY1OkmCnT_8",
        "colab_type": "text"
      },
      "source": [
        "Good to run this to ensure you are using TF2.x"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab_type": "code",
        "id": "ioLbtB3uGKPX",
        "colab": {}
      },
      "source": [
        "try:\n",
        "  # %tensorflow_version only exists in Colab.\n",
        "  %tensorflow_version 2.x\n",
        "except Exception:\n",
        "  pass"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yUG2oehtYM5N",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# At time of creation, tfds was at version 2.0 which had\n",
        "# some bugs in some common datasets. Advise to update to\n",
        "# tfds 2.1.0 like this:\n",
        "pip install tensorflow_datasets==2.1.0"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ni967hO0XExW",
        "colab_type": "text"
      },
      "source": [
        "# Create the Model Architecture\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "iMssw0vfXYvm",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import tensorflow as tf\n",
        "import tensorflow_datasets as tfds\n",
        "import tensorflow_addons as tfa\n",
        "import numpy as np\n",
        "import multiprocessing\n",
        "\n",
        "def create_model():\n",
        "  model = tf.keras.models.Sequential([\n",
        "    tf.keras.layers.Conv2D(16, (3,3), activation='relu', \n",
        "                                      input_shape=(300, 300, 3)),\n",
        "    tf.keras.layers.MaxPooling2D(2, 2),\n",
        "    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),\n",
        "    tf.keras.layers.MaxPooling2D(2,2),\n",
        "    tf.keras.layers.Flatten(),\n",
        "    tf.keras.layers.Dense(512, activation='relu'),\n",
        "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
        "  ])\n",
        "\n",
        "\n",
        "  model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
        "  return model"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "umcmHaluXnyS",
        "colab_type": "text"
      },
      "source": [
        "# EXTRACT\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "F9WDOP5pXpwV",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train_data = tfds.load('cats_vs_dogs', split='train', with_info=True)\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0Oc7a3QLYMsm",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "file_pattern = f'/root/tensorflow_datasets/cats_vs_dogs/4.0.0/cats_vs_dogs-train.tfrecord*'\n",
        "files = tf.data.Dataset.list_files(file_pattern)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BCcSOPhaZcqH",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train_dataset = files.interleave(tf.data.TFRecordDataset, cycle_length=4, num_parallel_calls=tf.data.experimental.AUTOTUNE)\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "GFCiG5BNXxx_",
        "colab_type": "text"
      },
      "source": [
        "# TRANSFORM\n"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "WDcg3ckCXxGu",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "def read_tfrecord(serialized_example):\n",
        "  feature_description={\n",
        "      \"image\": tf.io.FixedLenFeature((), tf.string, \"\"),\n",
        "      \"label\": tf.io.FixedLenFeature((), tf.int64, -1),\n",
        "  }\n",
        "  example = tf.io.parse_single_example(serialized_example, feature_description)\n",
        "  image = tf.io.decode_jpeg(example['image'], channels=3)\n",
        "  image = tf.cast(image, tf.float32)\n",
        "  image = image / 255\n",
        "  image = tf.image.resize(image, (300,300))\n",
        "  return image, example['label']"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HcR0q3afaSz4",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "cores = multiprocessing.cpu_count()\n",
        "print(cores)\n",
        "\n",
        "train_dataset = train_dataset.map(read_tfrecord, num_parallel_calls=cores)\n",
        "\n",
        "#train_dataset = train_dataset.cache()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "TaPzTWBAagEe",
        "colab_type": "text"
      },
      "source": [
        "# LOAD"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2PDFOEWXafoQ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "train_dataset = train_dataset.shuffle(1024).batch(32)\n",
        "\n",
        "train_dataset = train_dataset.prefetch(tf.data.experimental.AUTOTUNE)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "PzyJ_DqvarUl",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "model = create_model()\n",
        "model.fit(train_dataset, epochs=10, verbose=1)"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}
